In [90]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots 
from datetime import datetime 
In [91]:
coviddf=pd.read_csv(r"C:\Users\DELL\Desktop\rashmi\Practice projecct\covid19 India\data set\covid_india.csv")
In [92]:
coviddf.head(10)
Out[92]:
S. No. Name of State / UT Active Cases Cured/Discharged/Migrated Deaths Total Confirmed cases
0 1 Andaman and Nicobar 4 7408 129 7541
1 2 Andhra Pradesh 20593 1944267 13490 1978350
2 3 Arunachal Pradesh 3032 46399 237 49668
3 4 Assam 11719 555470 5357 572546
4 5 Bihar 357 715119 9646 725122
5 6 Chandigarh 27 61132 811 61970
6 7 Chhattisgarh 1780 987642 13536 1002958
7 8 Dadra and Nagar Haveli and Daman and Diu 12 10636 4 10652
8 9 Delhi 516 1411042 25065 1436623
9 10 Goa 992 167556 3157 171705
In [93]:
coviddf.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 6 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   S. No.                     36 non-null     int64 
 1   Name of State / UT         36 non-null     object
 2   Active Cases               36 non-null     int64 
 3   Cured/Discharged/Migrated  36 non-null     int64 
 4   Deaths                     36 non-null     int64 
 5   Total Confirmed cases      36 non-null     int64 
dtypes: int64(5), object(1)
memory usage: 1.8+ KB
In [94]:
coviddf.describe()
Out[94]:
S. No. Active Cases Cured/Discharged/Migrated Deaths Total Confirmed cases
count 36.000000 36.000000 3.600000e+01 36.000000 3.600000e+01
mean 18.500000 11448.694444 8.626628e+05 11871.416667 8.859829e+05
std 10.535654 31839.162320 1.239847e+06 22952.890569 1.282199e+06
min 1.000000 4.000000 7.408000e+03 4.000000 7.541000e+03
25% 9.750000 326.000000 6.136675e+04 799.750000 6.657275e+04
50% 18.500000 1387.000000 4.487250e+05 5243.500000 4.599410e+05
75% 27.250000 9166.000000 9.721065e+05 13501.500000 9.892878e+05
max 36.000000 178722.000000 6.130137e+06 133717.000000 6.341759e+06
In [95]:
vaccinedf=pd.read_csv(r"C:\Users\DELL\Desktop\rashmi\Practice projecct\covid19 India\data set\cowin_vaccine_data_statewise.csv")
In [96]:
vaccinedf.head(7)
Out[96]:
Updated On State Total Individuals Vaccinated Total Sessions Conducted Total Sites First Dose Administered Second Dose Administered Male(Individuals Vaccinated) Female(Individuals Vaccinated) Transgender(Individuals Vaccinated) Total Covaxin Administered Total CoviShield Administered Total Sputnik V Administered AEFI 18-45 years (Age) 45-60 years (Age) 60+ years (Age) Total Doses Administered
0 16/01/2021 India 48276.0 3455.0 2957.0 48276.0 0.0 23757.0 24517.0 2.0 579.0 47697.0 NaN NaN NaN NaN NaN 48276.0
1 17/01/2021 India 58604.0 8532.0 4954.0 58604.0 0.0 27348.0 31252.0 4.0 635.0 57969.0 NaN NaN NaN NaN NaN 58604.0
2 18/01/2021 India 99449.0 13611.0 6583.0 99449.0 0.0 41361.0 58083.0 5.0 1299.0 98150.0 NaN NaN NaN NaN NaN 99449.0
3 19/01/2021 India 195525.0 17855.0 7951.0 195525.0 0.0 81901.0 113613.0 11.0 3017.0 192508.0 NaN NaN NaN NaN NaN 195525.0
4 20/01/2021 India 251280.0 25472.0 10504.0 251280.0 0.0 98111.0 153145.0 24.0 3946.0 247334.0 NaN NaN NaN NaN NaN 251280.0
5 21/01/2021 India 365965.0 32226.0 12600.0 365965.0 0.0 132784.0 233143.0 38.0 5367.0 360598.0 NaN NaN NaN NaN NaN 365965.0
6 22/01/2021 India 549381.0 36988.0 14115.0 549381.0 0.0 193899.0 355402.0 80.0 8128.0 541253.0 NaN NaN NaN NaN NaN 549381.0
In [97]:
coviddf.drop(["S. No."],inplace=True,axis=1)
In [98]:
coviddf.head(5)
Out[98]:
Name of State / UT Active Cases Cured/Discharged/Migrated Deaths Total Confirmed cases
0 Andaman and Nicobar 4 7408 129 7541
1 Andhra Pradesh 20593 1944267 13490 1978350
2 Arunachal Pradesh 3032 46399 237 49668
3 Assam 11719 555470 5357 572546
4 Bihar 357 715119 9646 725122
In [99]:
statewise=pd.pivot_table(coviddf,values=['Total Confirmed cases','Cured/Discharged/Migrated','Deaths'],
                        index='Name of State / UT',aggfunc=max)
In [100]:
statewise['recovery rate']=statewise['Cured/Discharged/Migrated']*100/statewise['Total Confirmed cases']
In [101]:
statewise['mortality rate']=statewise['Deaths']*100/statewise['Total Confirmed cases']
In [102]:
statewise=statewise.sort_values(by="Total Confirmed cases",ascending=False)
In [103]:
statewise.style.background_gradient(cmap="cubehelix")
Out[103]:
  Cured/Discharged/Migrated Deaths Total Confirmed cases recovery rate mortality rate
Name of State / UT          
Maharashtra 6130137 133717 6341759 96.663039 2.108516
Kerala 3317314 17515 3513551 94.414853 0.498499
Karnataka 2854222 36741 2915317 97.904345 1.260275
Tamil Nadu 2516938 34260 2571383 97.882657 1.332357
Andhra Pradesh 1944267 13490 1978350 98.277201 0.681881
Uttar Pradesh 1685299 22771 1708689 98.631114 1.332659
West Bengal 1503535 18202 1532379 98.117698 1.187826
Delhi 1411042 25065 1436623 98.219366 1.744717
Chhattisgarh 987642 13536 1002958 98.472917 1.349608
Odisha 966928 6302 984731 98.192095 0.639972
Rajasthan 944606 8954 953793 99.036793 0.938778
Gujarat 814720 10077 825001 98.753820 1.221453
Madhya Pradesh 781265 10514 791937 98.652418 1.327631
Haryana 759705 9647 770042 98.657606 1.252789
Bihar 715119 9646 725122 98.620508 1.330259
Telengana 635895 3819 648388 98.073222 0.588999
Punjab 582580 16312 599365 97.199536 2.721547
Assam 555470 5357 572546 97.017532 0.935645
Jharkhand 341980 5130 347336 98.457977 1.476956
Uttarakhand 334456 7367 342336 97.698168 2.151979
Jammu and Kashmir 316496 4386 322286 98.203459 1.360903
Himachal Pradesh 202084 3533 207344 97.463153 1.703932
Goa 167556 3157 171705 97.583646 1.838619
Puducherry 118750 1799 121421 97.800216 1.481622
Manipur 92894 1628 102889 90.285648 1.582288
Tripura 76667 766 79948 95.896082 0.958123
Meghalaya 61445 1147 68107 90.218333 1.684115
Chandigarh 61132 811 61970 98.647733 1.308698
Arunachal Pradesh 46399 237 49668 93.418297 0.477168
Mizoram 30500 161 43530 70.066621 0.369860
Nagaland 26493 582 28445 93.137634 2.046054
Sikkim 24050 352 27652 86.973817 1.272964
Ladakh 20106 207 20378 98.665227 1.015801
Dadra and Nagar Haveli and Daman and Diu 10636 4 10652 99.849793 0.037552
Lakshadweep 10125 50 10243 98.847994 0.488138
Andaman and Nicobar 7408 129 7541 98.236308 1.710648
In [104]:
### top 10 active cases states
top_10_state_active=coviddf.groupby(by='Name of State / UT').max()['Active Cases'].reset_index()
top_10=top_10_state_active.sort_values(by=['Active Cases'],ascending=False).head(10)
top_10
Out[104]:
Name of State / UT Active Cases
16 Kerala 178722
20 Maharashtra 77905
15 Karnataka 24354
1 Andhra Pradesh 20593
30 Tamil Nadu 20185
23 Mizoram 12869
3 Assam 11719
25 Odisha 11501
35 West Bengal 10642
31 Telengana 8674
In [110]:
fig=plt.figure(figsize=(16,9))
sns.barplot(x = "Name of State / UT", y = "Active Cases", data = top_10)
plt.title("top 10 active cases states")
plt.xlabel("states")
plt.ylabel("active cases")
plt.show()
In [111]:
## top 10 state having highest death
top_10_state_death=coviddf.groupby(by='Name of State / UT').max()['Deaths'].reset_index()
top_10_death=top_10_state_death.sort_values(by=['Deaths'],ascending=False).head(10)
top_10_death
Out[111]:
Name of State / UT Deaths
20 Maharashtra 133717
15 Karnataka 36741
30 Tamil Nadu 34260
8 Delhi 25065
33 Uttar Pradesh 22771
35 West Bengal 18202
16 Kerala 17515
27 Punjab 16312
6 Chhattisgarh 13536
1 Andhra Pradesh 13490
In [114]:
fig=plt.figure(figsize=(16,9))
sns.barplot(x = "Name of State / UT", y = "Deaths", data = top_10_death)
plt.title("top 10 death cases states")
plt.xlabel("states")
plt.ylabel("death")
plt.show()
In [115]:
vaccinedf.head(5)
Out[115]:
Updated On State Total Individuals Vaccinated Total Sessions Conducted Total Sites First Dose Administered Second Dose Administered Male(Individuals Vaccinated) Female(Individuals Vaccinated) Transgender(Individuals Vaccinated) Total Covaxin Administered Total CoviShield Administered Total Sputnik V Administered AEFI 18-45 years (Age) 45-60 years (Age) 60+ years (Age) Total Doses Administered
0 16/01/2021 India 48276.0 3455.0 2957.0 48276.0 0.0 23757.0 24517.0 2.0 579.0 47697.0 NaN NaN NaN NaN NaN 48276.0
1 17/01/2021 India 58604.0 8532.0 4954.0 58604.0 0.0 27348.0 31252.0 4.0 635.0 57969.0 NaN NaN NaN NaN NaN 58604.0
2 18/01/2021 India 99449.0 13611.0 6583.0 99449.0 0.0 41361.0 58083.0 5.0 1299.0 98150.0 NaN NaN NaN NaN NaN 99449.0
3 19/01/2021 India 195525.0 17855.0 7951.0 195525.0 0.0 81901.0 113613.0 11.0 3017.0 192508.0 NaN NaN NaN NaN NaN 195525.0
4 20/01/2021 India 251280.0 25472.0 10504.0 251280.0 0.0 98111.0 153145.0 24.0 3946.0 247334.0 NaN NaN NaN NaN NaN 251280.0
In [120]:
vaccinedf.rename(columns={'Updated On':'vaccine_date'},inplace=True)
In [122]:
vaccinedf.head(5)
Out[122]:
vaccine_date State Total Individuals Vaccinated Total Sessions Conducted Total Sites First Dose Administered Second Dose Administered Male(Individuals Vaccinated) Female(Individuals Vaccinated) Transgender(Individuals Vaccinated) Total Covaxin Administered Total CoviShield Administered Total Sputnik V Administered AEFI 18-45 years (Age) 45-60 years (Age) 60+ years (Age) Total Doses Administered
0 16/01/2021 India 48276.0 3455.0 2957.0 48276.0 0.0 23757.0 24517.0 2.0 579.0 47697.0 NaN NaN NaN NaN NaN 48276.0
1 17/01/2021 India 58604.0 8532.0 4954.0 58604.0 0.0 27348.0 31252.0 4.0 635.0 57969.0 NaN NaN NaN NaN NaN 58604.0
2 18/01/2021 India 99449.0 13611.0 6583.0 99449.0 0.0 41361.0 58083.0 5.0 1299.0 98150.0 NaN NaN NaN NaN NaN 99449.0
3 19/01/2021 India 195525.0 17855.0 7951.0 195525.0 0.0 81901.0 113613.0 11.0 3017.0 192508.0 NaN NaN NaN NaN NaN 195525.0
4 20/01/2021 India 251280.0 25472.0 10504.0 251280.0 0.0 98111.0 153145.0 24.0 3946.0 247334.0 NaN NaN NaN NaN NaN 251280.0
In [123]:
vaccinedf.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5365 entries, 0 to 5364
Data columns (total 18 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   vaccine_date                         5365 non-null   object 
 1   State                                5365 non-null   object 
 2   Total Individuals Vaccinated         5360 non-null   float64
 3   Total Sessions Conducted             5360 non-null   float64
 4   Total Sites                          5360 non-null   float64
 5   First Dose Administered              5360 non-null   float64
 6   Second Dose Administered             5360 non-null   float64
 7   Male(Individuals Vaccinated)         5360 non-null   float64
 8   Female(Individuals Vaccinated)       5360 non-null   float64
 9   Transgender(Individuals Vaccinated)  5360 non-null   float64
 10  Total Covaxin Administered           5360 non-null   float64
 11  Total CoviShield Administered        5360 non-null   float64
 12  Total Sputnik V Administered         734 non-null    float64
 13  AEFI                                 3179 non-null   float64
 14  18-45 years (Age)                    3174 non-null   float64
 15  45-60 years (Age)                    3175 non-null   float64
 16  60+ years (Age)                      3175 non-null   float64
 17  Total Doses Administered             5364 non-null   float64
dtypes: float64(16), object(2)
memory usage: 754.6+ KB
In [127]:
vaccinedf.isnull().sum()
Out[127]:
vaccine_date                              0
State                                     0
Total Individuals Vaccinated              5
Total Sessions Conducted                  5
Total Sites                               5
First Dose Administered                   5
Second Dose Administered                  5
Male(Individuals Vaccinated)              5
Female(Individuals Vaccinated)            5
Transgender(Individuals Vaccinated)       5
Total Covaxin Administered                5
Total CoviShield Administered             5
Total Sputnik V Administered           4631
AEFI                                   2186
18-45 years (Age)                      2191
45-60 years (Age)                      2190
60+ years (Age)                        2190
Total Doses Administered                  1
dtype: int64
In [129]:
vaccine=vaccinedf.drop(columns=['Total Sputnik V Administered','AEFI','18-45 years (Age)',
                               '45-60 years (Age)','60+ years (Age)'],axis=1)
In [130]:
vaccine.head(5)
Out[130]:
vaccine_date State Total Individuals Vaccinated Total Sessions Conducted Total Sites First Dose Administered Second Dose Administered Male(Individuals Vaccinated) Female(Individuals Vaccinated) Transgender(Individuals Vaccinated) Total Covaxin Administered Total CoviShield Administered Total Doses Administered
0 16/01/2021 India 48276.0 3455.0 2957.0 48276.0 0.0 23757.0 24517.0 2.0 579.0 47697.0 48276.0
1 17/01/2021 India 58604.0 8532.0 4954.0 58604.0 0.0 27348.0 31252.0 4.0 635.0 57969.0 58604.0
2 18/01/2021 India 99449.0 13611.0 6583.0 99449.0 0.0 41361.0 58083.0 5.0 1299.0 98150.0 99449.0
3 19/01/2021 India 195525.0 17855.0 7951.0 195525.0 0.0 81901.0 113613.0 11.0 3017.0 192508.0 195525.0
4 20/01/2021 India 251280.0 25472.0 10504.0 251280.0 0.0 98111.0 153145.0 24.0 3946.0 247334.0 251280.0
In [132]:
#male vs female vaccination
male=vaccine['Male(Individuals Vaccinated)'].sum()
female=vaccine['Female(Individuals Vaccinated)'].sum()
px.pie(names=["male","female"],values=[male,female],title="male and female vaccination")
In [136]:
# remove rows where state is India
vaccine1=vaccinedf[vaccinedf.State!='India']
vaccine1
Out[136]:
vaccine_date State Total Individuals Vaccinated Total Sessions Conducted Total Sites First Dose Administered Second Dose Administered Male(Individuals Vaccinated) Female(Individuals Vaccinated) Transgender(Individuals Vaccinated) Total Covaxin Administered Total CoviShield Administered Total Sputnik V Administered AEFI 18-45 years (Age) 45-60 years (Age) 60+ years (Age) Total Doses Administered
145 16/01/2021 Andaman and Nicobar Islands 23.0 2.0 2.0 23.0 0.0 12.0 11.0 0.0 0.0 23.0 NaN NaN NaN NaN NaN 23.0
146 17/01/2021 Andaman and Nicobar Islands 23.0 2.0 2.0 23.0 0.0 12.0 11.0 0.0 0.0 23.0 NaN NaN NaN NaN NaN 23.0
147 18/01/2021 Andaman and Nicobar Islands 42.0 9.0 2.0 42.0 0.0 29.0 13.0 0.0 0.0 42.0 NaN NaN NaN NaN NaN 42.0
148 19/01/2021 Andaman and Nicobar Islands 89.0 12.0 2.0 89.0 0.0 53.0 36.0 0.0 0.0 89.0 NaN NaN NaN NaN NaN 89.0
149 20/01/2021 Andaman and Nicobar Islands 124.0 16.0 3.0 124.0 0.0 67.0 57.0 0.0 0.0 124.0 NaN NaN NaN NaN NaN 124.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5360 05/06/2021 West Bengal 12090072.0 981547.0 2517.0 12090072.0 3941080.0 6784722.0 5303588.0 1762.0 1806377.0 14224775.0 0.0 1211.0 2999339.0 4927157.0 4159589.0 16031152.0
5361 06/06/2021 West Bengal 12206706.0 479793.0 1016.0 12206706.0 3943243.0 6851075.0 5353848.0 1783.0 1825771.0 14324178.0 0.0 1214.0 3058135.0 4968447.0 4175911.0 16149949.0
5362 07/06/2021 West Bengal 12492937.0 1062959.0 2523.0 12492937.0 3960942.0 7014307.0 5476794.0 1836.0 1878776.0 14575103.0 0.0 1223.0 3174029.0 5087762.0 4226545.0 16453879.0
5363 08/06/2021 West Bengal 12742698.0 1026098.0 2358.0 12742698.0 3974349.0 7157564.0 5583273.0 1861.0 1931666.0 14785235.0 146.0 1238.0 3290866.0 5179191.0 4267590.0 16717047.0
5364 09/06/2021 West Bengal 12954543.0 887059.0 1952.0 12954543.0 3986376.0 7279703.0 5672936.0 1904.0 1985988.0 14954421.0 510.0 1254.0 3411008.0 5242947.0 4294961.0 16940919.0

5220 rows × 18 columns

In [137]:
vaccine1.rename(columns={'Total Doses Administered':'total'},inplace=True)
vaccine1.head(5)
C:\Users\DELL\anaconda3\lib\site-packages\pandas\core\frame.py:5039: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Out[137]:
vaccine_date State Total Individuals Vaccinated Total Sessions Conducted Total Sites First Dose Administered Second Dose Administered Male(Individuals Vaccinated) Female(Individuals Vaccinated) Transgender(Individuals Vaccinated) Total Covaxin Administered Total CoviShield Administered Total Sputnik V Administered AEFI 18-45 years (Age) 45-60 years (Age) 60+ years (Age) total
145 16/01/2021 Andaman and Nicobar Islands 23.0 2.0 2.0 23.0 0.0 12.0 11.0 0.0 0.0 23.0 NaN NaN NaN NaN NaN 23.0
146 17/01/2021 Andaman and Nicobar Islands 23.0 2.0 2.0 23.0 0.0 12.0 11.0 0.0 0.0 23.0 NaN NaN NaN NaN NaN 23.0
147 18/01/2021 Andaman and Nicobar Islands 42.0 9.0 2.0 42.0 0.0 29.0 13.0 0.0 0.0 42.0 NaN NaN NaN NaN NaN 42.0
148 19/01/2021 Andaman and Nicobar Islands 89.0 12.0 2.0 89.0 0.0 53.0 36.0 0.0 0.0 89.0 NaN NaN NaN NaN NaN 89.0
149 20/01/2021 Andaman and Nicobar Islands 124.0 16.0 3.0 124.0 0.0 67.0 57.0 0.0 0.0 124.0 NaN NaN NaN NaN NaN 124.0
In [139]:
# most vaccinated state
max_vac=vaccine1.groupby('State')['total'].sum().to_frame('total')
max_vac=max_vac.sort_values('total',ascending=False)[:5]
In [140]:
max_vac
Out[140]:
total
State
Maharashtra 1.293763e+09
Uttar Pradesh 1.081672e+09
Rajasthan 1.055760e+09
Gujarat 1.033493e+09
West Bengal 9.009359e+08
In [143]:
fig=plt.figure(figsize=(10,5))
sns.barplot(x = max_vac.index, y = max_vac.total, data = max_vac)
plt.title("top 5 vaccinated states")
plt.xlabel("states")
plt.ylabel("vaccination")
plt.show()
In [144]:
# least vaccinated state
least_vac=vaccine1.groupby('State')['total'].sum().to_frame('total')
least_vac=least_vac.sort_values('total',ascending=True)[:5]
In [145]:
least_vac
Out[145]:
total
State
Lakshadweep 1861077.0
Andaman and Nicobar Islands 7189806.0
Dadra and Nagar Haveli and Daman and Diu 7829204.0
Ladakh 9114076.0
Sikkim 15267706.0
In [146]:
fig=plt.figure(figsize=(10,5))
sns.barplot(x = least_vac.index, y = least_vac.total, data = least_vac)
plt.title("least 5 vaccinated states")
plt.xlabel("states")
plt.ylabel("vaccination")
plt.show()
In [ ]: